library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
## Warning: package 'forcats' was built under R version 3.5.2
## ── Conflicts ──────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(httr)
library(jsonlite)
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
library(rjson)
##
## Attaching package: 'rjson'
## The following objects are masked from 'package:jsonlite':
##
## fromJSON, toJSON
library(data.table)
## Warning: package 'data.table' was built under R version 3.5.2
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(stringr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday,
## week, yday, year
## The following object is masked from 'package:base':
##
## date
library(leaflet)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(htmltools)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:httr':
##
## config
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Chris provided this part of code. The idea is to pull all the session ids connected to the username NYCEJA, and saved in an R object df
real <- c("http://aircasting.org/api/realtime/sessions.json?page=0&page_size=500&q[measurements]=true&q[time_from]=0&q[time_to]=2552648500&q[usernames]=NYCEJA")
tt <- jsonlite::fromJSON(real)
t <- tt$streams$'AirBeam2-PM2.5'
t <- data.table(t)
ID <- t[!is.na(t$id)]$id
name <- data.frame(tt$title,t$id)
name <- name[!is.na(name$t.id),]
colnames(name) <- c("title", "id")
dt <- list()
for (i in 1:length(ID)) {
sess <- paste0("http://aircasting.org/api/realtime/stream_measurements.json/?end_date=2281550369000&start_date=0&stream_ids[]=",ID[i])
s1 <- jsonlite::fromJSON(sess)
s1 <- data.table(s1)
s1$ID <- ID[i]
dt[[i]] <- data.table(s1)
}
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:plotly':
##
## arrange, mutate, rename, summarise
## The following object is masked from 'package:lubridate':
##
## here
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
df <- ldply(dt, data.frame)
check df. This data contains measurement from 23 locations (represented by 23 IDs), generated every minute from 2018-6 to 2019-3. Note that the earliest date is in 2000 which does not make sense. This is more likely to be 2019, so I will change all 2000 to 2019.
# check data
head(df) %>%
knitr::kable()
| time | value | latitude | longitude | ID |
|---|---|---|---|---|
| 2019-03-27T11:52:18Z | 1 | 40.81336 | -73.95920 | 297507 |
| 2019-03-27T11:53:18Z | 3 | 40.81336 | -73.95920 | 297507 |
| 2000-01-01T01:44:01Z | 2 | 40.81390 | -73.95864 | 268832 |
| 2000-01-01T03:18:42Z | 6 | 40.81390 | -73.95864 | 268832 |
| 2000-01-01T03:26:43Z | 5 | 40.81390 | -73.95864 | 268832 |
| 2000-01-01T03:27:57Z | 5 | 40.81390 | -73.95864 | 268832 |
A bit data cleaning and manipulation
air_data <- df %>%
as_tibble() %>%
mutate(
time = str_replace(time, 'T', ' ')
) %>%
mutate(
time = str_replace(time, 'Z', '')
) %>%
mutate(
time = str_replace(time, '2000', '2019')
) %>%
mutate(
time = str_replace(time, '1999', '2018')
) %>%
mutate(
time = ymd_hms(time),
ID = as.character(ID)
) %>%
dplyr::rename(
station_id = ID
) %>%
filter(
!is.na(time)
)
First of all, we want to know where these stations are. I will use the leaflet package and plot these locations on top of OpenStreetMap
location <- df %>%
distinct(latitude, longitude) %>%
slice(-9, -20) %>%
mutate(
name = str_c(round(latitude, digits = 2), round(longitude, digits = 2), sep = ', ')
)
leaflet() %>%
addTiles() %>%
addCircleMarkers(
data = location,
lat = ~latitude, lng = ~longitude,
color = 'green',
label = ~htmlEscape(name)
)
Now I will look at each station. And after we have more information of these stations, I will change the size of each circle according to their average PM2.5 level.
For each location, we want to know how do PM2.5 change over time. Because there are too many data points, I will reduce them to a an hourly average, and plot each location.
For example, station 3 is at (40.7109366, -73.9596247). Let’s plot it
location_3 <- air_data %>%
filter(
station_id == '246560'
) %>%
mutate(
measure_date = as.Date(time),
measure_hour = hour(time)
)
loc_3 <- location_3 %>%
mutate(
timepoint = str_c(measure_date, measure_hour, sep = ' ')
) %>%
arrange(measure_date, measure_hour) %>%
mutate(
row_num = rownames(.)) %>%
mutate(
row_num = as.numeric(row_num)
) %>%
ungroup() %>%
ggplot(aes(x = row_num, y = value)) + geom_line() + geom_smooth()
ggplotly(loc_3)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'